### Finding gene modules that make cells susceptible to Zika infection
| title: “Finding gene modules that make cells susceptible to Zika infection” |
| tput: |
| html_document: default |
| thub_document: default |
| g_width: 12 |
| g_height: 4 |
Load the required R libraries:
library(Seurat)
library(Matrix)
library(EnsDb.Hsapiens.v75)
library(rhdf5)
library(dplyr)
library(ggplot2)
#library(infercnv)
Prior to this analysis we processed the raw sequencing data with cellranger/STAR aligner and ran an algorithm called CellBender on it to remove ambient RNA.
Here I 1.) load the data 2.) change the gene identifiers from ensemble ids to symbols 3.) construct a metadata matrix (with donor information, Zika exposure etc.) 4.) put the data and metadata into a Seurat object. 5.) load mitochondrial genes from a database for later use 6.) Remove doublets, by loading the doublet scores I got from an algorithm (called scrublet) I ran on the data before.
setwd('/home/jovyan/HB_ZIK/')
# glioblastoma_counts = read.delim('../data/ZikaGlioblastomas/tic-527/study5953-tic527-star-fc-genecounts.txt', row.names = 1)
# manifest = read.delim('../HB_ZIK/5953stdy_manifest_14517_170919_HB_ZIK_046_048_050_051.csv', sep = ',', skip = 8)
# manifest = manifest[manifest$SUPPLIER.SAMPLE.NAME != "",]
# scrublet_classification = read.delim('scrublet/SmartSeq_predicted_doublets.txt')
# glioblastoma_counts = glioblastoma_counts[,scrublet_classification != 1]
# glioblastoma_metadata = data.frame(matrix('', dim(glioblastoma_counts)[2],4))
# colnames(glioblastoma_metadata) = c('SampleName', 'Technology', 'ZikaExposure', 'Patient')
# glioblastoma_metadata[,1] = unlist(lapply(colnames(glioblastoma_counts), function(x) substring(x,2)))
# glioblastoma_metadata[,2] = rep('SmartSeq', dim(glioblastoma_counts)[2])
# glioblastoma_metadata[,3] = rep('TRUE', dim(glioblastoma_counts)[2])
# glioblastoma_metadata[,4] = substring(manifest$SUPPLIER.SAMPLE.NAME[match(glioblastoma_metadata$SampleName, manifest$SANGER.SAMPLE.ID)], 3, 4)
# patients = c('42','42','43', '43', '45', '45', '46', '46') # (info from sample tracker)
# geneNames = as.matrix(read.table(paste('/home/jovyan/data/HB_ZIK/HB_ZIK/cellranger302_count_32771_5953STDY855119', as.character(1), '_GRCh38-3_0_0_premrna/filtered_feature_bc_matrix/features.tsv', sep = '')))
#
# geneOccurence = table(geneNames[,2])
# for (gene in names(geneOccurence)){
# if (geneOccurence[gene] > 1){
# geneNames[which(geneNames[,2] == gene),2] = paste(geneNames[which(geneNames[,2] == gene),2], '(', geneNames[which(geneNames[,2] == gene),1], ')', sep = '')
# }
# }
# rownames(glioblastoma_counts) = geneNames[match(rownames(glioblastoma_counts), geneNames[,1]),2]
# glioblastoma_counts = glioblastoma_counts[geneNames[,2],]
# for (i in 1:8){
# print(i)
# # prefiltered count_matrix:
# data_subset <- as.matrix(Read10X_h5(filename = paste('../data/HB_ZIK/HB_ZIK/cellranger302_count_32771_5953STDY855119', as.character(i), '_GRCh38-3_0_0_premrna/output_filtered.h5', sep = ''), use.names = TRUE))
# scrublet_classification = read.delim(paste('scrublet/5953STDY855119', as.character(i), '_predicted_doublets.txt', sep = ''))
# print('removed doublets:')
# print(sum(scrublet_classification))
# data_subset = data_subset[,scrublet_classification != 1]
# sampleNames = rownames(data_subset)
# glioblastoma_counts = cbind(glioblastoma_counts, data_subset)
# metadata_subset = data.frame(matrix('', dim(data_subset)[2],4))
# colnames(metadata_subset) = c('SampleName', 'Technology', 'ZikaExposure', 'Patient')
# metadata_subset[,1] = colnames(data_subset)
# metadata_subset[,2] = rep('10X', dim(data_subset)[2])
# metadata_subset[,3] = rep('FALSE', dim(data_subset)[2])
# metadata_subset[,4] = rep(patients[i], dim(data_subset)[2])
# glioblastoma_metadata = rbind(glioblastoma_metadata, metadata_subset)
# }
# mitogenes <- genes(EnsDb.Hsapiens.v75, filter = ~ seq_name == "MT")$gene_id
# mitogenes = geneNames[,2][match(mitogenes, geneNames[,1])]
# mitogenes = mitogenes[!is.na(mitogenes)]
# percent.mt = colSums(glioblastoma_counts[rownames(glioblastoma_counts) %in% mitogenes,])/colSums(glioblastoma_counts)
# Glioblastoma <- CreateSeuratObject(glioblastoma_counts, project = 'HB_ZIK', min.cells = 0, min.features = 0)
# Glioblastoma$SampleName = colnames(glioblastoma_counts)
# Glioblastoma$Technology = glioblastoma_metadata$Technology
# Glioblastoma$ZikaExposure = glioblastoma_metadata$ZikaExposure
# Glioblastoma$Patient = glioblastoma_metadata$Patient
# Glioblastoma$percent.mt = percent.mt
# saveRDS(Glioblastoma, file = "../data/ZikaGlioblastomas/zikaGlioblastomas_SeuratObject.rds")
Glioblastoma = readRDS("../data/ZikaGlioblastomas/zikaGlioblastomas_SeuratObject.rds")
Glioblastoma = Glioblastoma[,!is.na(Glioblastoma$Patient)]
Glioblastoma = Glioblastoma[,Glioblastoma$Patient == '46']
The QC plots using number of detected genes, number of counts and percent of counts coming from mitochondrial genes (as a proxy for stress), show a couple of outlier cells, which I remove:
Glioblastoma.list <- SplitObject(Glioblastoma, split.by = 'Technology')
i = 1
VlnPlot(Glioblastoma.list[[i]], features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, group.by = 'Patient')
plot1 <- FeatureScatter(Glioblastoma.list[[i]], feature1 = "nCount_RNA", feature2 = "percent.mt")
plot2 <- FeatureScatter(Glioblastoma.list[[i]], feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
CombinePlots(plots = list(plot1, plot2))
Glioblastoma.list[[i]] <- subset(Glioblastoma.list[[i]], subset = nFeature_RNA > 2500 & nFeature_RNA < 12000 & nCount_RNA < 2*10^6 & percent.mt < 0.1)
VlnPlot(Glioblastoma.list[[i]], features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, group.by = 'Patient')
i = 2
VlnPlot(Glioblastoma.list[[i]], features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, group.by = 'Patient')
plot1 <- FeatureScatter(Glioblastoma.list[[i]], feature1 = "nCount_RNA", feature2 = "percent.mt")
plot2 <- FeatureScatter(Glioblastoma.list[[i]], feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
CombinePlots(plots = list(plot1, plot2))
Glioblastoma.list[[i]] <- subset(Glioblastoma.list[[i]], subset = nFeature_RNA > 500 & nFeature_RNA < 5000 & nCount_RNA > 0 & nCount_RNA < 4*10^5 & percent.mt < 0.1)
VlnPlot(Glioblastoma.list[[i]], features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, group.by = 'Patient')
The following normalizes, scales and select 2000 particularly variable genes for the 10x data:
Glioblastoma.10x = SplitObject(Glioblastoma.list[[2]], split.by = 'Patient')
for (i in 1){
Glioblastoma.10x[[i]] <- NormalizeData(Glioblastoma.10x[[i]], normalization.method = "LogNormalize", scale.factor = 10000)
Glioblastoma.10x[[i]] <- FindVariableFeatures(Glioblastoma.10x[[i]], selection.method = "vst", nfeatures = 2000)
top25 <- head(VariableFeatures(Glioblastoma.10x[[i]]), 25)
plot1 <- VariableFeaturePlot(Glioblastoma.10x[[i]])
plot2 <- LabelPoints(plot = plot1, points = top25, repel = TRUE)
CombinePlots(plots = list(plot1, plot2))
all.genes <- rownames(Glioblastoma.10x[[i]])
Glioblastoma.10x[[i]] <- ScaleData(Glioblastoma.10x[[i]], features = all.genes)
}
The following normalizes, scales and select 2000 particularly variable genes for the SmartSeq data:
Glioblastoma.SmartSeq = SplitObject(Glioblastoma.list[[1]], split.by = 'Patient')
rm(Glioblastoma)
rm(Glioblastoma.list)
for (i in 1){
Glioblastoma.SmartSeq[[i]] <- NormalizeData(Glioblastoma.SmartSeq[[i]], normalization.method = "LogNormalize", scale.factor = 10000)
Glioblastoma.SmartSeq[[i]] <- FindVariableFeatures(Glioblastoma.SmartSeq[[i]], selection.method = "vst", nfeatures = 2000)
top25 <- head(VariableFeatures(Glioblastoma.SmartSeq[[i]]), 25)
plot1 <- VariableFeaturePlot(Glioblastoma.SmartSeq[[i]])
plot2 <- LabelPoints(plot = plot1, points = top25, repel = TRUE)
CombinePlots(plots = list(plot1, plot2))
all.genes <- rownames(Glioblastoma.SmartSeq[[i]])
Glioblastoma.SmartSeq[[i]] <- ScaleData(Glioblastoma.SmartSeq[[i]], features = all.genes)
}
Calculate principal components:
for (i in 1){
Glioblastoma.10x[[i]] <- RunPCA(Glioblastoma.10x[[i]], features = VariableFeatures(object = Glioblastoma.10x[[i]]))
print(Glioblastoma.10x[[i]][["pca"]], dims = 1:5, nfeatures = 5)
VizDimLoadings(Glioblastoma.10x[[i]], dims = 1:2, reduction = "pca")
DimPlot(Glioblastoma.10x[[i]], reduction = "pca")
DimHeatmap(Glioblastoma.10x[[i]], dims = 1, cells = round(dim(Glioblastoma.10x[[i]])[2])/2, balanced = TRUE)
DimHeatmap(Glioblastoma.10x[[i]], dims = 1:15, cells = round(dim(Glioblastoma.10x[[i]])[2])/2, balanced = TRUE)
}
## PC_ 1
## Positive: ST6GALNAC3, MAN2A1, NEAT1, SPP1, PDK4
## Negative: SOX5, PTPRZ1, GPM6A, SNTG1, NRXN1
## PC_ 2
## Positive: DNAJC6, RNF220, PEX5L, TMTC2, SPOCK3
## Negative: CHST11, CELF2, NHSL1, SAT1, LRMDA
## PC_ 3
## Positive: IGFBP7, COL4A1, COL8A1, PRKG1, COL4A2
## Negative: LINC01322, GLCCI1, ADAMTSL1, FAM110B, SLC8A3
## PC_ 4
## Positive: DPP10, CHL1, CADPS, TNC, SHISA9
## Negative: EGFL7, MYO1B, ABCB1, ADGRL4, VWF
## PC_ 5
## Positive: DCLK1, ADGRV1, ROBO2, WDR49, PDZRN3
## Negative: ADARB2, SORCS1, HS6ST3, TMEM132D, RIMS2
Based on the JackStraw procedure I select 14 PCs for further analysis in both cases:
for (i in 1){
Glioblastoma.10x[[i]] <- JackStraw(Glioblastoma.10x[[i]], num.replicate = 100)
Glioblastoma.10x[[i]] <- ScoreJackStraw(Glioblastoma.10x[[i]], dims = 1:20)
JackStrawPlot(Glioblastoma.10x[[i]], dims = 1:20)
ElbowPlot(Glioblastoma.10x[[i]], ndims = 40)
}
This is the clustering step:
n_dimensions = 14
for (i in 1){
Glioblastoma.10x[[i]] <- FindNeighbors(Glioblastoma.10x[[i]], dims = 1:n_dimensions)
Glioblastoma.10x[[i]] <- FindClusters(Glioblastoma.10x[[i]], resolution = 0.5)
head(Idents(Glioblastoma.10x[[i]]), 5)
}
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 10949
## Number of edges: 367618
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9091
## Number of communities: 13
## Elapsed time: 1 seconds
Clusters roughly agree with visual seperation on a UMAP plot:
for (i in 1){
Glioblastoma.10x[[i]] <- RunUMAP(Glioblastoma.10x[[i]], dims = 1:n_dimensions)
print(DimPlot(Glioblastoma.10x[[i]], reduction = "umap", label = TRUE))
}
Find and visualize top cluster markers:
i = 1
markers <- FindAllMarkers(Glioblastoma.10x[[i]], only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
top = markers %>% group_by(cluster) %>% top_n(n = 1, wt = avg_logFC)
pdf('figures/FeaturePlotUnbiasedClusterMarkers_10x_Patient46.pdf', width = 20, height = 10)
print(FeaturePlot(Glioblastoma.10x[[i]], features = top$gene))
dev.off()
## png
## 2
print(FeaturePlot(Glioblastoma.10x[[i]], features = top$gene))
top10<- markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC)
pdf('figures/HeatmapUnbiasedClusterMarkers_10x_Patient46.pdf', width = 20, height = 10)
print(DoHeatmap(Glioblastoma.10x[[i]], features = top10$gene, cells = 1:dim(Glioblastoma.10x[[i]])[2] + NoLegend()) +
theme(text = element_text(size = 5)))
dev.off()
## png
## 2
print(DoHeatmap(Glioblastoma.10x[[i]], features = top10$gene, cells = 1:dim(Glioblastoma.10x[[i]])[2] + NoLegend()) +
theme(text = element_text(size = 5)))
Vizualize a priori chosen markers of tumour subtypes and brain cell types:
options(stringsAsFactors = FALSE)
modules = read.delim('/home/jovyan/HB_ZIK/1-s2.0-S0092867419306877-mmc2.csv', skip = 4, sep = ',')
cell.markers = c(
'Neurons' = 'RBFOX3', # NeuN
'Immature Neurons' = 'DCX',
'NSC.1' = 'EMX2', # Apparently also in astrocytes
'NSC.2' = 'PAX6',
'Activated neurons' = 'FOS',
'NSC and Astrocytes' = 'GFAP',
'VGLUT1 excitatory' = 'SLC17A7',
'VGLUT2 excitatory' = 'SLC17A6',
'Layer I inhibitory' = 'RELN',
'Layer II excitatory' = 'LAMP5',
'Layer II/III-IV excitatory.1' = 'CUX1',
'Layer II/III-IV excitatory.2' = 'CUX2',
'Layer IV excitatory' = 'RORB',
'Layer Va excitatory' = 'PCP4',
'Layer Vb excitatory' = 'HTR2C',
'Layer VIa excitatory' = 'OPRK1',
'Layer VIb excitatory' = 'NR4A2',
'HPA Upper layer' = 'TPPP3',
'HPA Mid layer' = 'NECAB1',
'HPA Lower layer' = 'PCP4',
'GABAergic' = 'GAD1',
'PVALB interneuron' = 'PVALB',
'SST interneuron' = 'SST',
'5HT3aR interneuron' = 'HTR3A',
'VIP interneuron' = 'VIP', #Subset of 5HTraR
'Cholinergic' = 'ACHE',
'Dopaminergic' = 'TH',
'Serotinergic' = 'TPH1',
'Astrocytes' = 'AQP4',
'Oligodendrocytes.1' = 'PLP1',
'Oligodendrocytes.1' = 'MOG',
'OPC' = 'PDGFRA',
'Endothelial cells' = 'APOLD1',
'Microglia.1' = 'CCL4',
'Microglia.2' = 'CCL3',
'Microglia.3' = 'TMEM119',
'Microglia and astrocytes' = 'CX3CR1',
'pre-Bötzinger interneurons' = 'TACR1',
'T cells' = 'CD3G'
)
for (i in 1){
Glioblastoma.10x[[i]]$MESlike = colSums(Glioblastoma.10x[[i]]@assays$RNA@scale.data[rownames(Glioblastoma.10x[[i]]@assays$RNA@scale.data) %in% c(modules['MES2'][modules['MES2'] != ''], modules['MES1'][modules['MES1'] != '']),])
Glioblastoma.10x[[i]]$AClike = colSums(Glioblastoma.10x[[i]]@assays$RNA@scale.data[rownames(Glioblastoma.10x[[i]]@assays$RNA@scale.data) %in% modules['AC'][modules['AC'] != ''],])
Glioblastoma.10x[[i]]$OPClike = colSums(Glioblastoma.10x[[i]]@assays$RNA@scale.data[rownames(Glioblastoma.10x[[i]]@assays$RNA@scale.data) %in% modules['OPC'][modules['OPC'] != ''],])
Glioblastoma.10x[[i]]$NPClike = colSums(Glioblastoma.10x[[i]]@assays$RNA@scale.data[rownames(Glioblastoma.10x[[i]]@assays$RNA@scale.data) %in% c(modules['NPC2'][modules['NPC2'] != ''], modules['NPC1'][modules['NPC1'] != '']),])
features = c('MESlike', 'AClike', 'OPClike', 'NPClike', unlist(cell.markers))
count = 0
for (f in list(1:10, 11:20,21:30,31:43)){
count = count + 1
pdf(paste('figures/KnownMarkersFeaturePlot', as.character(count), '10x_Patient46.pdf', sep = ''), width = 20, height = 10)
print(FeaturePlot(Glioblastoma.10x[[i]], features = features[f]))
dev.off()
print(FeaturePlot(Glioblastoma.10x[[i]], features = features[f]))
}
}
Visualize a priori chosen markers in a heatmap:
pdf('figures/KnownMarkersHeatmap_10x_Patient46.pdf', width = 20, height = 10)
print(DoHeatmap(Glioblastoma.10x[[1]], features = c('MESlike', 'AClike', 'OPClike', 'NPClike', unlist(cell.markers))) + NoLegend())
dev.off()
## png
## 2
DoHeatmap(Glioblastoma.10x[[1]], features = c('MESlike', 'AClike', 'OPClike', 'NPClike', unlist(cell.markers))) + NoLegend()
Assign identity to cluster based on marker expression:
new.cluster.ids <- c("0: Oligodendrocytes1",
"1: Oligodendrocytes2",
"2: Microglia",
"3: OPClike1",
"4: OPClike2",
"5: Microglia",
"6: Oligodendrocytes3",
"7: AClike",
"8: Unclassified",
"9: OPClike3",
"10: Endothelial Cells",
"11: MESlike",
"12: NPClike")
names(new.cluster.ids) <- levels(Glioblastoma.10x[[1]])
Glioblastoma.10x[[1]] <- RenameIdents(Glioblastoma.10x[[1]], new.cluster.ids)
pdf('figures/UMAPwithClusterLabel_10x_Patient46.pdf', width = 20, height = 10)
print(DimPlot(Glioblastoma.10x[[1]], reduction = "umap", label = TRUE, pt.size = 0.5, label.size = 8) + NoLegend())
dev.off()
## png
## 2
DimPlot(Glioblastoma.10x[[1]], reduction = "umap", label = TRUE, pt.size = 0.5, label.size = 8) + NoLegend()
Integrate with smartSeq data:
reference.list <- list(Glioblastoma.10x[[1]], Glioblastoma.SmartSeq[[1]])
glioblastoma.anchors <- FindIntegrationAnchors(object.list = reference.list, dims = 1:n_dimensions)
glioblastoma.integrated <- IntegrateData(anchorset = glioblastoma.anchors, dims = 1:n_dimensions)
Visualize integrated data:
library(cowplot)
library(patchwork)
# switch to integrated assay. The variable features of this assay are automatically
# set during IntegrateData
DefaultAssay(glioblastoma.integrated) <- "integrated"
# Run the standard workflow for visualization and clustering
glioblastoma.integrated <- ScaleData(glioblastoma.integrated, verbose = FALSE)
glioblastoma.integrated <- RunPCA(glioblastoma.integrated, npcs = n_dimensions, verbose = FALSE)
glioblastoma.integrated <- RunUMAP(glioblastoma.integrated, reduction = "pca", dims = 1:n_dimensions)
p1 <- DimPlot(glioblastoma.integrated, reduction = "umap", group.by = "Technology")
p2 <- DimPlot(glioblastoma.integrated, reduction = "umap", label = TRUE,
repel = TRUE) + NoLegend()
pdf('figures/UMAPintegrated_Patient46.pdf', width = 20, height = 10)
print(p1 + p2)
dev.off()
## png
## 2
p1+p2
Now classify cells and make plot of cell proportions:
# Do not include unclassified cells in reference:
Reference = Glioblastoma.10x[[1]][,Idents(Glioblastoma.10x[[1]]) != '8: Unclassified']
glioblastoma.anchors <- FindTransferAnchors(reference = Reference, query = Glioblastoma.SmartSeq[[1]], dims = 1:n_dimensions)
predictions <- TransferData(anchorset = glioblastoma.anchors, refdata = Idents(Reference), dims = 1:n_dimensions)
Glioblastoma.SmartSeq[[1]] <- AddMetaData(Glioblastoma.SmartSeq[[1]], metadata = predictions)
Also make plots of integrations scores:
# Do not include unclassified cells in reference:
hist(unname(Glioblastoma.SmartSeq[[1]]$prediction.score.max[Glioblastoma.SmartSeq[[1]]$predicted.id == '3: OPClike1']),xlim = c(0,1), xlab = 'Classification Score', main = 'Histogram of OPClike1 Classification Scores')
hist(unname(Glioblastoma.SmartSeq[[1]]$prediction.score.max[Glioblastoma.SmartSeq[[1]]$predicted.id == '4: OPClike2']),xlim = c(0,1), xlab = 'Classification Score', main = 'Histogram of OPClike2 Classification Scores')
hist(unname(Glioblastoma.SmartSeq[[1]]$prediction.score.max[Glioblastoma.SmartSeq[[1]]$predicted.id == '5: Microglia']),xlim = c(0,1), xlab = 'Classification Score', main = 'Histogram of Microglia Classification Scores')
Finally visualize cell proportions in 10x and SmartSeq data:
# Do not include unclassified cells in reference:
tab = c(table(Idents(Reference)), table(Glioblastoma.SmartSeq[[1]]$predicted.id))
# create a dataset
tech <- c(rep("10X" , 12) , rep("SmartSeq" , 3))
celltype <- names(tab)
value <- unname(tab)
data <- data.frame(tech,celltype,value)
# Stacked + percent
pdf('figures/CellProportions_Patient46.pdf', width = 20, height = 10)
ggplot(data, aes(fill=celltype, y=value, x=tech)) + geom_bar(position="fill", stat="identity")
dev.off()
## png
## 2
ggplot(data, aes(fill=celltype, y=value, x=tech)) + geom_bar(position="fill", stat="identity")
To be sure we also cluster the SmartSeq data on its own:
Calculate principal components:
for (i in 1){
Glioblastoma.SmartSeq[[i]] <- RunPCA(Glioblastoma.SmartSeq[[i]], features = VariableFeatures(object = Glioblastoma.SmartSeq[[i]]))
print(Glioblastoma.SmartSeq[[i]][["pca"]], dims = 1:5, nfeatures = 5)
VizDimLoadings(Glioblastoma.SmartSeq[[i]], dims = 1:2, reduction = "pca")
DimPlot(Glioblastoma.SmartSeq[[i]], reduction = "pca")
DimHeatmap(Glioblastoma.SmartSeq[[i]], dims = 1, cells = round(dim(Glioblastoma.SmartSeq[[i]])[2])/2, balanced = TRUE)
DimHeatmap(Glioblastoma.SmartSeq[[i]], dims = 1:15, cells = round(dim(Glioblastoma.SmartSeq[[i]])[2])/2, balanced = TRUE)
}
## PC_ 1
## Positive: C1QB, TYROBP, C1QC, CAPG, AIF1
## Negative: KCNQ5, DMD, NLGN1, CREB5, MLLT3
## PC_ 2
## Positive: PARP14, TNFAIP2, NFKBIA, LYN, ZC3HAV1
## Negative: FDFT1, PDIA6, ACAT2, CLU, PON2
## PC_ 3
## Positive: ASPM, NDC80, UBE2C, KIF23, TTK
## Negative: HIST1H4H, LINC01170, TOX, SCG2, FOXN3
## PC_ 4
## Positive: IFIH1, PLA2G4A, PPM1K, GBP4, ISG20
## Negative: PSMC3IP, LINC01170, ASPM, THRB, GLRA2
## PC_ 5
## Positive: LAYN, ANXA1, LCTL, CD44, TCIM
## Negative: ONECUT2, DPP6, RIT2, AC004852.2, NRXN1
This is the clustering step:
n_dimensions = 14
for (i in 1){
Glioblastoma.SmartSeq[[i]] <- FindNeighbors(Glioblastoma.SmartSeq[[i]], dims = 1:n_dimensions)
Glioblastoma.SmartSeq[[i]] <- FindClusters(Glioblastoma.SmartSeq[[i]], resolution = 0.5)
head(Idents(Glioblastoma.SmartSeq[[i]]), 5)
}
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 284
## Number of edges: 8286
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7954
## Number of communities: 4
## Elapsed time: 0 seconds
Clusters roughly agree with visual seperation on a UMAP plot:
for (i in 1){
Glioblastoma.SmartSeq[[i]] <- RunUMAP(Glioblastoma.SmartSeq[[i]], dims = 1:n_dimensions)
print(DimPlot(Glioblastoma.SmartSeq[[i]], reduction = "umap", label = TRUE))
}
Find and visualize top cluster markers:
i = 1
markers_S <- FindAllMarkers(Glioblastoma.SmartSeq[[i]], only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
top = markers_S %>% group_by(cluster) %>% top_n(n = 1, wt = avg_logFC)
pdf('figures/FeaturePlotUnbiasedClusterMarkers_SmartSeq_Patient46.pdf', width = 20, height = 10)
print(FeaturePlot(Glioblastoma.SmartSeq[[i]], features = top$gene))
dev.off()
## png
## 2
print(FeaturePlot(Glioblastoma.SmartSeq[[i]], features = top$gene))
top10<- markers_S %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC)
pdf('figures/HeatmapUnbiasedClusterMarkers__SmartSeq_Patient46.pdf', width = 20, height = 10)
print(DoHeatmap(Glioblastoma.SmartSeq[[i]], features = top10$gene, cells = 1:dim(Glioblastoma.SmartSeq[[i]])[2] + NoLegend()) +
theme(text = element_text(size = 5)))
dev.off()
## png
## 2
print(DoHeatmap(Glioblastoma.SmartSeq[[i]], features = top10$gene, cells = 1:dim(Glioblastoma.SmartSeq[[i]])[2] + NoLegend()) +
theme(text = element_text(size = 5)))
Vizualize a priori chosen markers of tumour subtypes:
for (i in 1){
Glioblastoma.SmartSeq[[i]]$MESlike = colSums(Glioblastoma.SmartSeq[[i]]@assays$RNA@scale.data[rownames(Glioblastoma.SmartSeq[[i]]@assays$RNA@scale.data) %in% c(modules['MES2'][modules['MES2'] != ''], modules['MES1'][modules['MES1'] != '']),])
Glioblastoma.SmartSeq[[i]]$AClike = colSums(Glioblastoma.SmartSeq[[i]]@assays$RNA@scale.data[rownames(Glioblastoma.SmartSeq[[i]]@assays$RNA@scale.data) %in% modules['AC'][modules['AC'] != ''],])
Glioblastoma.SmartSeq[[i]]$OPClike = colSums(Glioblastoma.SmartSeq[[i]]@assays$RNA@scale.data[rownames(Glioblastoma.SmartSeq[[i]]@assays$RNA@scale.data) %in% modules['OPC'][modules['OPC'] != ''],])
Glioblastoma.SmartSeq[[i]]$NPClike = colSums(Glioblastoma.SmartSeq[[i]]@assays$RNA@scale.data[rownames(Glioblastoma.SmartSeq[[i]]@assays$RNA@scale.data) %in% c(modules['NPC2'][modules['NPC2'] != ''], modules['NPC1'][modules['NPC1'] != '']),])
features = c('MESlike', 'AClike', 'OPClike', 'NPClike', unlist(cell.markers))
count = 0
for (f in list(1:10, 11:20,21:30,31:43)){
count = count + 1
pdf(paste('figures/KnownMarkersFeaturePlot', as.character(count), '_SmartSeq_Patient46.pdf', sep = ''), width = 20, height = 10)
print(FeaturePlot(Glioblastoma.SmartSeq[[i]], features = features[f]))
dev.off()
print(FeaturePlot(Glioblastoma.SmartSeq[[i]], features = features[f]))
}
}
Visualize a priori chosen markers in a heatmap:
pdf('figures/KnownMarkersHeatmapSmartSeq_Patient46.pdf', width = 20, height = 10)
print(DoHeatmap(Glioblastoma.SmartSeq[[1]], features = c('MESlike', 'AClike', 'OPClike', 'NPClike', unlist(cell.markers))) + NoLegend())
dev.off()
## png
## 2
DoHeatmap(Glioblastoma.SmartSeq[[1]], features = c('MESlike', 'AClike', 'OPClike', 'NPClike', unlist(cell.markers))) + NoLegend()